"""
freebuf爬虫
"""
from library import library as clib
from crawler import baseScrawler
from datetime import datetime
import json

# 全局变量
connect = clib.Connect()


class freebuf(baseScrawler.baseScrawler):
    # init：当前 ---> xxxx-xx-xx时间
    def __init__(self, start_date, num):
        super().__init__(start_date, num)
        self.mainUrl = "https://www.freebuf.com"

    # 解析json，json --> 总的列表，若是日期超过上限，则mark=1，停止加入final_all列表
    def json_parse(self, json_text):
        res_dict = json.loads(json_text)
        if res_dict['code'] == 400:
            self.mark = 1
            return
        article_list = res_dict['data']['data_list']

        for item in article_list:
            dic = {}
            dic['article_link'] = self.mainUrl + item['url']
            dic['article_title'] = item['post_title']
            dic['author_link'] = self.mainUrl + "/author/" + item['username']
            dic['author_name']= item['username']
            dic['classification'] = item['category']
            dic['time'] = item['post_date'][:10]
            dic['page_view'] = item['read_count']

            time = datetime.strptime(dic['time'], "%Y-%m-%d")
            if time <= self.start_date:
                self.mark = 1
                break
            elif self.unique(dic):
                self.final_all.append(dic)
                self.allurl.append(dic.get("author_link"))

    # 启动，返回最终的final_all列表，若失败，则返回空列表
    def start(self):
        for tag in self.tag_list.values():
            page = 0
            self.mark = 0

            while self.mark == 0:
                page = page + 1
                currentUrl = self.mainUrl + (
                            "/fapi/frontend/category/list?name=%s&page=%d&limit=50&select=0&order=0&type=category" % (tag, page))
                ures = connect.request_with_no_flag(currentUrl).text

                if ures.find('4ny0neSec_wrong') == -1:
                    self.json_parse(ures)
                else:
                    # 可以将失败的存到日志里hhddj
                    print("-----------------" + currentUrl + " fail-----------------")

        self.res = self.filter.views_first(self.final_all, self.num)


